import requests
from bs4 import BeautifulSoup
import json,csv
class BookSpider(object):
    def __init__(self):
        self.base_url='http://www.allitebooks.org/page/{}/'
        self.headers={
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
        }
        self.data_list=[]

    def get_url_list(self):
        url_list=[]
        for i in range(1,10):
            url=self.base_url.format(i)
            url_list.append(url)
        return url_list

    def send_request(self,url):
        data=requests.get(url,headers=self.headers).content.decode()
        print(url)
        return data

    def parse_bs4_data(self,data):

        bs4_data=BeautifulSoup(data,'lxml')

        book_list=bs4_data.select('article')
        for book in book_list:
            book_dict={}
            book_dict['book_name']=book.select_one('.entry-title').get_text()
            book_dict['book_img']=book.select_one('.wp-post-image').get('src')#只有一个select_one
            book_dict['book_author']=book.select_one('.entry-author').get_text()[4:]
            book_dict['book_info']=book.select_one('.entry-summary p').get_text()
            self.data_list.append(book_dict)

    def save_data(self):
        json.dump(self.data_list,open('bs4解析book廖.json','w'))
        json_fp=open('bs4解析book廖.json','r')
        csv_fp=open('book.csv','w')
        data_list=json.load(json_fp)
        sheet_title={'书名','图片地址','作者','简介'}
        sheet_data=[]
        for data in data_list:
            sheet_data.append(data.values())
        writer=csv.writer(csv_fp)
        writer.writerow(sheet_title)
        writer.writerows(sheet_data)
        json_fp.close()
        csv_fp.close()
    def start(self):
        url_list=self.get_url_list()
        for url in url_list:
            data=self.send_request(url)
            self.parse_bs4_data(data)
        # self.save_data()


BookSpider().start()






